clear
set more off

use combined_dataset_2024, clear
keep if banteammergers=="False"
keep if pubscore_normal~=.
drop if rewardquantity<5000

drop teamleaderid teamname scorefirstsubmitteddate publicleaderboardsubmissionid privateleaderboardsubmissionid isbenchmark medal medalawarddate sourcekernelversionid scoredate enableteammodels 
drop lastsubmissiondate hostsegmenttitle publicscoreleaderboarddisplay publicscorefullprecision privatescoreleaderboarddisplay privatescorefullprecision  hasleaderboard evaluationalgorithmismax maxteamsize banteammergers 

***************Table 1: contest level summary stats
preserve

bys competitionid teamid: gen aux = 1 if _n==1

*** Competitions without teams of size 2:
bys competitionid: egen total_teams2 = total(teamsize==2) if aux==1
bys competitionid: gen c=1 if _n==1
tab competitionid if c==1 & total_teams2==0 // competitions without teams of size 2:
* Competition 2863 does not have teams of size 2. It has teams of other sizes though.


** Competitions with teams of size 2 but with missing merger dates:
gen teams2_no_date_aux=(teamsize==2 & merger_time==. & aux==1)
bys competitionid: egen without_merger_date = total(teams2_no_date_aux)
gen ratio_no_dates=without_merger_date/total_teams2 if aux==1
tab competitionid if ratio_no_dates>0 & aux==1 & total_teams2>0 // competitions with teams of size 2 but no merger dates:

** Drop these 4 competition; no dates for 2-member teams:
drop if competitionid == 2445 | competitionid == 2467 | competitionid == 2479 | competitionid == 2863


** stats:
bys competitionid: gen total_submissions = _N
bys competitionid : egen total_teams = total(aux)
drop aux

bys competitionid submitteduserid: gen aux = 1 if _n==1
bys competitionid : egen total_users = total(aux)
drop aux


bys competitionid teamid: gen teamsize2 = teamsize if _n==1
sum teamsize2, d
tab teamsize2


gen sp=(teamsize2==1)

bys competitionid : egen sp_competition = total(sp)
gen fraction_sp=sp_competition/total_teams

bys competitionid : egen avteamsize = mean(teamsize2)
bys competitionid: keep if _n==1

log using Output/Table1.txt, replace text
estpost sum  total_submissions total_users total_teams fraction_sp rewardquantity
esttab, cell("count mean(fmt(%9.2f))  sd(fmt(%9.2f)) min(fmt(%9.2f)) max(fmt(%11.2f))") tex
log close

restore


***************Table 2: teamsize distribution
log using Output/Table2.txt, replace text
*Panel A
preserve
** Drop these 4 competitions, no dates for 2-member teams:
drop if competitionid == 2445 | competitionid == 2467 | competitionid == 2479 | competitionid == 2863

bys competitionid teamid: keep if _n==1
bys competitionid: gen tot_teams=_N

gen ateamsize1 = (teamsize==1)
gen ateamsize2 = (teamsize==2)
gen ateamsize3 = (teamsize>=3 &teamsize!=.)

bys competitionid: egen teamsize_1 = total(ateamsize1)
bys competitionid: egen teamsize_2 = total(ateamsize2)
bys competitionid: egen teamsize_3 = total(ateamsize3)

replace teamsize_1=teamsize_1/tot_teams*100
replace teamsize_2=teamsize_2/tot_teams*100
replace teamsize_3=teamsize_3/tot_teams*100


drop ateamsize*
bys competitionid: keep if _n==1
estpost sum  teamsize_*
esttab, cell("mean(fmt(%9.2f))  sd(fmt(%9.2f)) min(fmt(%9.2f)) max(fmt(%11.2f))") tex
restore

*Panel B
preserve
** Drop these 4 competitions, no dates for 2-member teams:
drop if competitionid == 2445 | competitionid == 2467 | competitionid == 2479 | competitionid == 2863
keep if privateleaderboardrank<=40
bys competitionid teamid: keep if _n==1
bys competitionid: gen tot_teams=_N

gen ateamsize1 = (teamsize==1)
gen ateamsize2 = (teamsize==2)
gen ateamsize3 = (teamsize>=3 &teamsize!=.)

bys competitionid: egen teamsize_1 = total(ateamsize1)
bys competitionid: egen teamsize_2 = total(ateamsize2)
bys competitionid: egen teamsize_3 = total(ateamsize3)

replace teamsize_1=teamsize_1/tot_teams*100
replace teamsize_2=teamsize_2/tot_teams*100
replace teamsize_3=teamsize_3/tot_teams*100

drop ateamsize*
bys competitionid: keep if _n==1

*Footnote empirical model. However, [XX] percent of multi-player teams are two-member teams.
gen stat2=teamsize_2/(teamsize_2+teamsize_3)*100
sum stat2,d


estpost sum  teamsize_*
esttab, cell("mean(fmt(%9.2f))  sd(fmt(%9.2f)) min(fmt(%9.2f)) max(fmt(%11.2f))") tex
restore
log close


****** Figure 1
use combined_dataset_2024, clear
preserve
keep if pubscore_normal~=.
drop if rewardquantity<500
drop if competitionid == 2445 | competitionid == 2467 | competitionid == 2479 | competitionid == 2863

keep if privateleaderboardrank~=.
bys teamid: keep if _n==1
keep if privateleaderboardrank<=40
keep privateleaderboardrank  teamsize
gen multiplayer=(teamsize>1)
graph bar multiplayer, over(privateleaderboardrank, relabel(1 "1" 2 " " 3 " " 4 " " 5 "5" 6 " " 7 " " 8 " " 9" " 10 "10" 11 " " 12 " " 13 " " 14 " " 15 "15" 16 " " 17 " " 18 " " 19" " 20 "20" 21 " " 22 " " 23 " " 24 " " 25 "25" 26 " " 27 " " 28 " " 29" " 30 "30" 31 " " 32 " " 33 " " 34 " " 35 "35" 36 " " 37 " " 38 " " 39" " 40 "40")) ytitle(Share of multiplayer teams, size(large)) title(Final ranking, position(6)  size(large)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) scheme(sj) 
graph export Output/Figure1.pdf, replace
restore
